###
# Replication file for 
# "What Makes Foreign Policy Teams Tick.."
# 5/19/2018
###
#Replication materials for STM analysis 


setwd("C:/Users/dtingley/Dropbox")
setwd("GJP Text Analysis/scripts/JoPReplication")
setwd("results")

library(devtools)
install_version("stm", version = "1.3.0", repos = "http://cran.us.r-project.org")
library(stm)

rm(list=ls())

#set your own directory here
setwd("C:/Users/dtingley/Dropbox/GJP Text Analysis/scripts/JoPReplicationSTM")

load("JoPSTMReplication-preSTM.RData")
 

#Run STM model
stm.out.45<- stm( documents,  vocab, 45, prevalence = ~ Condition + ifp_id
                  , data=metadata,init.type=c("Spectral") )
save(documents,metadata,vocab,stm.out.45,file="STM-JOPReplication.RData")


### 
#post estimation analysis
###

z<- stm.out.45

labelTopics(z)


setwd("C:/Users/dtingley/Dropbox/GJP Text Analysis/paper/paper1/finalfigures")

pdf("TopTopics-JoP-2.pdf",height=12 )
plot(z)
dev.off()

 


#estimate covariate effects
prep<-estimateEffect(~ Condition + ifp_id,z,metadata, uncertainty="Global")

m<-45

###
#Word clouds and effects
###


pdf(file="Cloud-45topic20thresh-Team-JoP.pdf")
par(mfrow=c(2,3))
r<-c("Team No Train", "Top Team","Team Train")
g<-c("","","","","")
stm::cloud(z,topic=2,main="Norms",max.words=m)
stm::cloud(z,topic=12,main="Norms",max.words=m)
stm::cloud(z,topic=39,main="Players",max.words=m)

plot(prep,model = z, topics = 1, verbose.labels=T, covariate = "Condition", method="pointestimate",main="Topic 1: Teamwork",labeltype="custom",custom.labels=g)
plot(prep,model = z, topics = 12, verbose.labels=T, covariate = "Condition", method="pointestimate",main="Topic 12: Analysis",labeltype="custom",custom.labels=r)
plot(prep,model = z, topics = 39, verbose.labels=T, covariate = "Condition", method="pointestimate",main="Topic 39: Analysis",labeltype="custom",custom.labels=g)
dev.off()


###
#Example quotes
###


 f<-findThoughts(z, topics =39, metadata$body, n = 20)$docs[[1]]
plotQuote(f[4],width=70,xlab="Topic 39")
 
 
f<-findThoughts(z, topics =12, metadata$body, n = 10)$docs[[1]]
plotQuote(f[c(5,7)] ,width=70,xlab="Topic 12")
 

 

 
###
#Regression analyses
###

#this creates a unique team identifyer
metadata$yr_team <- paste(metadata$year, metadata$ctt, sep = "-")

#Rescale the DV
metadata$score<--metadata$score

#doc length and duration
lengths <- unlist(lapply(documents, function(x) sum(x[2,])))
metadata$length <- lengths
metadata$daysince<-floor(as.Date(metadata$date_posted)-as.Date(metadata$date_start))
metadata$daysince<-as.numeric(metadata$daysince)
metadata$yearchar<-as.character(metadata$year)


#extract theta loadings
Topic12<-z$theta[,12]
metadata<- cbind(metadata, Topic12)
Topic1<-z$theta[,1]
metadata<- cbind(metadata, Topic1)
Topic39<-z$theta[,39]
metadata<- cbind(metadata, Topic39)


 

#####
# SEs using rms package
######
library("texreg")
library("rms")

cid <- metadata$user_id_anon
#function for calculating cluster robust se's and returning se + p-value
# takes in the ols model and th id
crob <- function(model, cid) {
  obj <- robcov(model, cluster=cid)
  beta <- obj$coef
  se <- sqrt(diag(vcov(obj)))
  Z <- beta/se
  P <-  1 - pchisq(Z^2, 1)
  return(list(se=se, pvalues=P))
}



 
SuperTeam12<-SuperTeam<-ols(score~ Topic1*Topic12 +as.numeric(daysince) + ifp_id + year +length,
                            data=subset(metadata,Condition=="SuperTeam"), x=TRUE,y=TRUE)
SuperTeam12.se<-SuperTeam.se <- crob(SuperTeam, cid[metadata$Condition=="SuperTeam"])

SuperTeam39<-SuperTeam<-ols(score~ Topic1*Topic39 +as.numeric(daysince) + ifp_id + year +length,
                            data=subset(metadata,Condition=="SuperTeam"), x=TRUE,y=TRUE)
SuperTeam39.se<-SuperTeam.se <- crob(SuperTeam, cid[metadata$Condition=="SuperTeam"])



#Regression table with only super team
ti<-c("1","2")
texreg(list(SuperTeam12, SuperTeam39 ),
       override.se=list( SuperTeam12.se$se, SuperTeam39.se$se ),
       override.pval=list( SuperTeam12.se$pvalues,SuperTeam39.se$pvalues  ),
       caption="OLS models of topics and accuracy", #ci.force=TRUE,
       stars = c( 
         0.01, 0.05, 0.1),
       custom.model.names = ti,
       label="tab:regtopics3",omit.coef="ifp",
       omit.stat=c("f","ser"),file="reg-topics-super-JoP-Final.tex")



